##########    ESS partner analysis
##########    Jonne Kamphorst
##########    20/10/2021
##########3
##########     This file ads the CHES codes to the ESS data. The code is copied from Sophie Hill


# load packages
library(tidyverse) # for data wrangling
library(essurvey) # to download ESS data
# if necessary, install with this command:
# devtools::install_github("ropensci/essurvey")
library(sjlabelled) # to convert party vote choice into names
library(data.table) # for the "fread" function to quickly load large csv files

# useful function
tabl <- function(...) table(..., useNA='ifany')

# IN ORDER TO DOWNLOAD DATA FROM THE ESS USING THE ESSURVEY PACKAGE,
# YOU NEED TO REGISTER YOUR EMAIL WITH THE ESS:
# you can do that here: http://www.europeansocialsurvey.org/user/new
# once you have registered, fill in your email address below
essurvey::set_email("jonnekamphorst@hotmail.com")

# Let's load all available rounds 1-9
# The function defaults to "stata" format
# BUT: there is an error with the haven package
# (more info here: https://github.com/ropensci/essurvey/issues/44)
# So if you import in stata format then rounds 1-8 get imported with haven
# and round 9 gets imported with foreign
# This means rounds 1-8 and round 9 end up being in different formats
# that's annoying!
# Instead, let's just import all rounds in SPSS format to avoid that error:
ess_raw <- import_rounds(1:9, format="spss")


# Also import the ESS file that I have with Oesch in there
ESS <- readRDS("C:/Users/jonne/OneDrive/projects/schools/analysis/ESS partner/ESS/ESS18.rds")



# Now we need to create a function to:
# (i) select required variables from each of the 9 datasets
# (ii) create a generalized party vote choice variable, instead of having lots of country-round specific variables

# note: for Germany there are TWO vote intention variables
# since they cast 1 vote for a candidate "prtvde1" and then 1 vote for a party list "prtvde2"
# I will just use the party of the candidate vote
# which is why I drop variables ending in "de2" in the function below

# You can add the variables you want to extract in the select function below
# Make sure to get the variable name exactly right: http://nesstar.ess.nsd.uib.no/webview/
# Use "start_with()" / "ends_with()" to grab all variables starting with that string
es.df.clean <- function(x){
  esx <- x %>% select("essround", # REQUIRED: essround
                      "idno", # REQUIRED: respondent ID
                      "cntry", # REQUIRED: country 
                      starts_with("inw"), # REQUIRED: interview date (to match vote recall to specific election)
                      "gndr" , # gender
                      "agea", # age
                      starts_with("edulvl"), # educational attainment (several vars)
                      starts_with("isco"), # occupation
                      starts_with("prtv"), # party vote
                      -ends_with("de1"), # drop 1st German vote intention var
  ) %>% 
    as.data.frame()
  # find FIRST country-specific vote variable
  start <- head(grep("prtv", colnames(esx)), n=1)
  # find LAST country-specific vote variable
  end <- tail(grep("prtv", colnames(esx)), n=1)
  # mini dataset of party choice vars
  es.vote <- esx %>% select(start:end)
  # create dataset-wide vote variable by merging the country-specific vars
  esx$party.vote.num <- as.vector(do.call(coalesce, es.vote))
  # convert numeric values into party names
  es.vote.named <- as_label(es.vote)
  # convert factors into characters to make sure they're stored properly
  es.vote.named[] <- lapply(es.vote.named, as.character)
  # create another dataset-wide vote variable, this time for the character variable
  esx$party.vote.name <- as.vector(do.call(coalesce, es.vote.named))
  # convert to UTF encoding to deal with special characters
  # delete unnecessary variables
  start <- head(grep("prtvt", colnames(esx)), n=1)
  end <- tail(grep("prtvt", colnames(esx)), n=1)
  esx <- esx %>% select(-(start:end))
  esx
}

# apply cleaning function to each of the 9 datasets in the lsit
ess_clean <- lapply(ess_raw, FUN=es.df.clean)
# bind all 9 datasets together
ess <- bind_rows(ess_clean)


# age
ess$age <- ess$agea
ess$age[ess$agea==999] <- NA
ess$age.group <- cut(ess$age, breaks=c(0,20,35,50,65,75, 120))

# year
ess$essround.year <- NA
ess$essround.year[ess$essround==1] <- 2002
ess$essround.year[ess$essround==2] <- 2004
ess$essround.year[ess$essround==3] <- 2006
ess$essround.year[ess$essround==4] <- 2008
ess$essround.year[ess$essround==5] <- 2010
ess$essround.year[ess$essround==6] <- 2012
ess$essround.year[ess$essround==7] <- 2014
ess$essround.year[ess$essround==8] <- 2016
ess$essround.year[ess$essround==9] <- 2018

ess$party.vote.ess <- ifelse(is.na(ess$party.vote.num), NA,
                             paste0(ess$cntry, "-", ess$essround, "-", ess$party.vote.num))


# load the ESS-Partyfacts extended crosswalk
cw_ess_pf <- read_csv(url("https://raw.githubusercontent.com/sophieehill/ess-partyfacts-crosswalk/master/ess-partyfacts-extended.csv"))
cw_ess_pf$party.vote.ess <- paste0(cw_ess_pf$cntry, "-", cw_ess_pf$essround, "-", cw_ess_pf$ess_id)
cw_ess_pf <- cw_ess_pf %>% select(party.vote.ess, partyfacts_id, partyfacts_name)

# merge partyfacts IDs into main dataset
ess <- left_join(ess, cw_ess_pf, by=c("party.vote.ess"))


# now load the Partyfacts-External crosswalk and select the Manifesto dataset
# this lets us link those partyfacts IDs to *other* datasets
cw_pf <- read_csv(url("https://partyfacts.herokuapp.com/download/external-parties-csv/"))
cw_pf$dataset_party_id <- as.numeric(as.character(cw_pf$dataset_party_id))
cw_pf_cmp <- cw_pf %>% filter(dataset_key == "manifesto") %>% select(partyfacts_id, dataset_party_id)

names(cw_pf_cmp) <- c("partyfacts_id", "cmp_id")

ess <- left_join(ess, cw_pf_cmp, by=c("partyfacts_id"))
tabl(ess$cmp_id)



# Merge with CMP data to get party families
# Download latest CMP dataset
# (Use API or just load "cmp.csv")
library(manifestoR)
# set API key
mp_setapikey(key = "70af9d9d7f76a3d66d41142debe969f6")
# download latest dataset
cmp <- as.data.frame(mp_maindataset())

# simply keep the latest year for each party. This means that for cases where a party changes family between the first and last ESS
#   round, we are using the wrong party family. These cases are likely to be sparse and alternatives lead to too many missing 
cmp.x <- cmp %>% select(party, date, partyname, parfam) %>% rename(cmp_id = party, cmp_parfam = parfam) %>%
  group_by(cmp_id) %>% top_n(1, date)


# match up by election year
# N.B. this won't work for cases where two elections happen in the same year, and ESS fieldwork window covers the 2nd election
ess <- left_join(ess, cmp.x, by=c("cmp_id"))
# alternatively we could match on exact election date
# cmp.x$election.date <- as.Date(cmp.x$edate)
# ess$election.date <- as.Date(ess$ref.election)
# ess <- left_join(ess, cmp.x, by=c("cmp_id", "election.date"))

# create left vote recall based on party families
# 10 = ecological
# 20 = socialist or other left
# 30 = social democratic
# 40 = Liberal parties
# 70 = nationalist parties
ess$vote.left <- ifelse(ess$cmp_parfam==10 | ess$cmp_parfam==20 | ess$cmp_parfam==30, 1, 0)
tabl(ess$vote.left)

ess$vote.rrp <- ifelse(ess$cmp_parfam==70, 1, 0)
tabl(ess$vote.rrp)

ess$vote.prog <- ifelse(ess$cmp_parfam==10, 1, 0)
tabl(ess$vote.prog)


#safe the variables we need
essx <- ess %>% distinct(idno, cntry, essround, .keep_all = T) %>%
  select(idno, cntry, essround, vote.rrp, vote.prog, 
                       age.group, party.vote.ess, partyfacts_id, partyfacts_name,
                       cmp_id, cmp_parfam, vote.left)                        %>% 
  as.data.frame()


#add mine and Sophie's data set together
ESS2 <- left_join(ESS, essx)





saveRDS(ESS2, file = "analysis/ESS partner/ESS/ESS_complete.rds")

